library(readr)
library(tidyverse)
library(ggplot2)
library(janitor)
library(tidycensus)
library(viridis)
library(tscount)
library(yrbss)
import_raw_data = FALSE
if(import_raw_data){
ss = read_csv("../../data_1/school_shootings.csv")
colnames(ss) = ss[1,]
ss = ss[-1,]%>%
clean_names()%>%
select(-c(na,na_2))%>%
filter(reliability_score_1_5!=1)
ss2 = read_csv("../../data_1/ss_pt2.csv")%>%
select(-X1)
colnames(ss2)=colnames(ss)[34:ncol(ss)]
ss[(1426:nrow(ss)),(34:ncol(ss))]=ss2
ss$shooter_age = as.numeric(ss$shooter_age)
write.csv(ss,"ss_data.csv")
}
# ss is csv file with school shooting data (individual incidents)
ss = read_csv("ss_data.csv")%>%
select(-X1)%>%
mutate(year = as.numeric(substr(date, (nchar(date)-3),nchar(date))))%>%
mutate(state = ifelse(state == "St. Croix, US Virgin Islands","St.Croix",state))%>%
distinct_at(.vars = c(1:24), .keep_all = T)
# data set to go from state names to abbreviations
states = data.frame(state.name,state.abb)
dc = data.frame(matrix(ncol= 2, nrow = 1))
colnames(dc)=colnames(states)
dc$`state.name` = "District of Columbia"
dc$`state.abb` = "DC"
states = rbind(states,dc)
states$state.name = as.character(states$state.name)
if(import_raw_data){
# 1980-1990
pop8090_raw = read.delim("https://www2.census.gov/programs-surveys/popest/tables/1980-1990/state/asrh/st8090ts.txt" )%>%data.frame()
pop8090_raw = pop8090_raw[c(8:58,64:114),]%>%data.frame()
colnames(pop8090_raw)="var"
pop8090 = data.frame(pop8090_raw$var[1:51],pop8090_raw$var[52:102])
colnames(pop8090)=c("var1","var2")
pop8090 = pop8090%>%
separate(var1, c("state",as.character(seq(1980,1984,by = 1))), extra = "merge")%>%
separate(var2, c("state1",as.character(seq(1985,1990,by = 1))), extra = "merge")%>%
select(c(state,starts_with("19")))
# 1970 - 1980
pop7080_raw = read.delim("https://www2.census.gov/programs-surveys/popest/tables/1980-1990/state/asrh/st7080ts.txt")
pop7080_raw = pop7080_raw[c(10:61,63:114),]%>%data.frame()
colnames(pop7080_raw)="var"
pop7080 = data.frame(pop7080_raw$var[1:51],pop7080_raw$var[53:103])
colnames(pop7080)=c("var1","var2")
pop7080 = pop7080%>%
separate(var1, c("blnk","id","state","1970","1971","1972","1973","1974","1975"), extra = "merge")%>%
separate(var2, c("blnk1","id1","state1",as.character(seq(1976,1980,by = 1))), extra = "merge")%>%
select(c(state,starts_with("19")))
# 1990-2000
pop90_00 = read_csv("../../data/pop_90-00.csv")%>%
rename(state = X1)%>%
select(c(state,3:13))%>%
filter(state != "USA")
colnames(pop90_00)[2:ncol(pop90_00)]=as.character(seq(1990,2000,by = 1))
pop90_00$state[30:35] = states$state.name[29:34]
pop90_00$state[40:42] = states$state.name[39:41]
pop90_00$state[9] = states$state.name[51]
pop90_00$state[49] = states$state.name[48]
# 2000-2010
pop00_10 = read.csv("https://www2.census.gov/programs-surveys/popest/datasets/2000-2010/intercensal/state/st-est00int-agesex.csv")%>%
clean_names()%>%
filter(sex == 0, age == 999)%>%
select(c(name,starts_with("popestimate")))%>%
rename_at(vars(starts_with("popestimate")),funs(substr(.,start = 12, stop = 15)))%>%
rename(state = name)%>%
mutate(state = as.character(state))%>%
mutate(state = ifelse(state=="District of Columbia",as.character(states$state.name[51]),state))%>%
filter(state != "United States")
# 2010-2018
pop10_18 = read_csv("../../data/pop_2010-2018.csv")
colnames(pop10_18)=pop10_18[1,]
pop10_18 = pop10_18%>%
slice(-1)%>%
rename_at(vars(starts_with("Pop")), funs(substr(.,start = 38,stop = 41)))%>%
rename(state = Geography)%>%
select(c(state,starts_with("20")))%>%
mutate(state = ifelse(state=="District of Columbia",as.character(states$state.name[51]),state))
# 2019
pop2019 = read_csv("../../data/2019pop.csv")%>%
clean_names()%>%
select(c(state,pop))%>%
rename("2019"=pop)
pops = left_join(pop7080,pop8090%>%select(-c("1980")), by = "state")%>%
left_join(states,by = c("state"="state.abb"))%>%
mutate(state = state.name)%>%
select(-state.name)%>%
left_join(pop90_00%>%select(-c("1990")), by = "state")%>%
left_join(pop00_10%>%select(-c("2000")), by = "state")%>%
left_join(pop10_18%>%select(-c("2010")), by = "state")%>%
left_join(pop2019,by = "state")%>%
gather(key = "year", value = "population", c(2:51))
write.csv(pops, "state_populations.csv")
}
# read in population data (include abbreviated and full state name)
pops = read_csv("state_populations.csv")%>%
select(-X1)%>%
left_join(states, by = c("state"="state.name"))
sts = unique(ss$state)
yrs = seq(range(ss$year)[1],range(ss$year)[2],by=1)
st_yr = data.frame(state = rep(sts,length(yrs)), year = sort(rep(yrs,length(sts))))%>%
arrange(state)
## data set aggrgated by state and year
ss_counts = ss%>%
group_by(state,year) %>%
summarize(incident_count = n(),
total_victims = sum(total_injured_killed_victims),
total_fatalities = sum(killed_includes_shooter),
total_wounded = sum(wounded),
avg_victims = mean(total_injured_killed_victims),
avg_fatalities = mean(killed_includes_shooter),
ave_wounded = mean(wounded),
avg_shooter_age = mean(shooter_age),
max_shooter_age = max(shooter_age),
min_shooter_age = min(shooter_age),
avg_reliability = mean(reliability_score_1_5),
target = mean(ifelse(targeted_specific_victim_s=="Y",1,
ifelse(targeted_specific_victim_s=="N",0,NA)),na.rm = T),
random_vict = mean(ifelse(random_victims=="Y",1,
ifelse(random_victims=="N",0,NA)),na.rm = T),
bullied = mean(ifelse(bullied_y_n_n_a=="Y",1,
ifelse(bullied_y_n_n_a=="N",0,NA)),na.rm = T),
domestic_violence = mean(ifelse(domestic_violence_y_n=="Y",1,
ifelse(domestic_violence_y_n=="N",0,NA)),na.rm = T))%>%
mutate(in_ss = TRUE)%>%
full_join(st_yr, by = c("state","year"))%>%
left_join(pops%>%rename(full_state_name = state), by = c("state"="state.abb", "year"))%>%
mutate(incident_count = ifelse(is.na(incident_count),0,incident_count))%>%
mutate(in_ss = ifelse(is.na(in_ss),FALSE,in_ss))
# read in state grade data
if(import_raw_data){
grades18 = read_csv("../../data/2018grades.csv")%>%
select(-c(X6,X7))%>%
clean_names()%>%
select(-gun_death_rate_per_100k)%>%
mutate(year = "2018")%>%
rename(grade = x2018_grade)
grades17 = read_csv("../../data/2017grades.csv")%>%
clean_names()%>%
mutate(year = "2017")%>%
rename(grade = x2017_grade)
grades16 = read_csv("../../data/2016grades.csv")%>%
clean_names()%>%
mutate(year = "2016")%>%
rename(grade = x2016_grade)
grades15 = read_csv("../../data/2015grades.csv")%>%
clean_names()%>%
mutate(year = "2015")%>%
rename(grade = x2015_grade)
grades14 = read_csv("../../data/2014grades.csv")%>%
clean_names()%>%
mutate(year = "2014")%>%
rename(grade = x2014_grade)
grades13 = read_csv("../../data/2013grades.csv")%>%
clean_names()%>%
mutate(year = "2013")%>%
rename(grade = x2013_grade)
grades12 = read_csv("../../data/2012grades.csv")%>%
clean_names()%>%
mutate(year = "2012")%>%
rename(grade = x2012_grade)
# combine all year data
grades = bind_rows(grades18,grades17)%>%
bind_rows(grades16)%>%
bind_rows(grades15)%>%
bind_rows(grades14)%>%
bind_rows(grades13)%>%
bind_rows(grades12)
write.csv(grades,"state_grades.csv")
}
grades = read_csv("state_grades.csv")%>%
select(-X1)
gpa_convert = data.frame(letter_grade = c("A","A-","B+","B","B-",
"C+","C","C-","D+","D","D-","F"),
gpa = c(4,3.7,3.3,3,2.7,2.3,2,1.7,1.3,1,0.7,0))
ss_counts = ss_counts%>%
left_join(grades%>%select(state,grade,year),by = c("full_state_name"="state","year"))%>%
left_join(gpa_convert, by = c("grade"="letter_grade"))
media = read_csv("media_data.csv")%>%
separate(`Year-Month`, into = c("year","month"), sep = "-")%>%
clean_names()%>%
select(-starts_with("x"))%>%
group_by(year) %>%
mutate(yearly_articles = sum(articles_shootings_excluding_firearm_laws_and_regulations))%>%
mutate(yearly_shootings = sum(mass_shooting))%>%
mutate(art_per_inc = yearly_articles/yearly_shootings)%>%
ungroup()%>%
mutate(prev_month_art = c(0,articles_shootings_excluding_firearm_laws_and_regulations[1:227]))%>%
mutate(prev_year_art = c(rep(0,12),yearly_articles[1:216]))%>%
mutate(year = as.numeric(year))
ss_counts = ss_counts%>%
left_join(media%>%select(prev_year_art, year,yearly_articles)%>%distinct(), by = "year")
pov=read.csv("census_poverty_data.csv",stringsAsFactors = FALSE)
pov=filter(pov,State!=" United States"&State!=" United States"& State!="District of Columbia"&State!="United States")
names(pov)[1:2]<-c("state","poverty")
names(pov)[4]<-"year"
skel=expand_grid(sort(unique(pov$state)),min(pov$year):(max(pov$year)+2))
names(skel)<-c("state","year")
sk1=filter(skel,year==2007|year==2008|year==2009)
sk1=left_join(sk1,filter(pov,year==2007),by="state")
sk2=filter(skel,year==2010|year==2011|year==2012)
sk2=left_join(sk2,filter(pov,year==2010),by="state")
sk3=filter(skel,year==2013|year==2014|year==2015)
sk3=left_join(sk3,filter(pov,year==2013),by="state")
sk4=filter(skel,year==2016|year==2017|year==2018)
sk4=left_join(sk4,filter(pov,year==2016),by="state")
pov=rbind(sk1,sk2,sk3,sk4)
pov=pov[,1:3]
names(pov)<-c("state","year","poverty")
pov = left_join(pov,states, by = c("state"="state.name"))%>%
select(-state)%>%
rename(state = state.abb)
ss_counts = left_join(ss_counts, pov, by = c("state","year"))
bully = read_csv("State_bullying.csv")%>%
select(c(total_score,state.abb))%>%
rename(state = state.abb)%>%
mutate(year = 2010)
ss_counts = left_join(ss_counts, bully%>%select(-year), by = "state")%>%
rename(bullying_score = total_score)%>%
mutate(bullying_score = as.numeric(bullying_score))
mh.data=read.csv("mh.data.csv",stringsAsFactors = FALSE)%>%
clean_names()%>%
select(-x)%>%
left_join(states, by = c("state"="state.name"))%>%
select(-state)%>%
rename(state = state.abb)
ss_counts = ss_counts%>%
left_join(mh.data, by = c("state","year"))%>%
rename(mh_score = percent)
## Warning: Column `state` joining character vector and factor, coercing into
## character vector
# Rand Laws Data
data.rand=read.csv("rand.laws.database.csv",stringsAsFactors = FALSE)
qns = c("qn13","qn14","qn15","qn16","qn17","qn18","qn19","qn20","qn24","qn25","qn26","qn27","qn28","qn29","qn30","qnbullyweight","qnbullygay")
sts = yrbss::getListOfParticipatingStates()
yrs = c(1991,1993,1995,1997,1999,2001,2003,2005,2007,2009,2011,2013,2015)
if(import_raw_data){
bullying = data.frame(variable = NULL, state = NULL, year = NULL, prop= NULL, ciLB= NULL, ciUB = NULL, n = NULL)
for(v in qns){
for(s in sts){
for(y in yrs ){
p = yrbss::getProportionSingleVariable(.variable = v, .location = s, .year = y, .level = 0.95)$prop
ciL = yrbss::getProportionSingleVariable(.variable = v, .location = s, .year = y, .level = 0.95)$ciLB
ciU = yrbss::getProportionSingleVariable(.variable = v, .location = s, .year = y, .level = 0.95)$ciUB
sample_size = yrbss::getProportionSingleVariable(.variable = v, .location = s, .year = y, .level = 0.95)$n
newrow = data.frame(variable = v, state = s, year = y, prop= p, ciLB= ciL, ciUB = ciU, n = sample_size)
bullying = bind_rows(bullying,newrow)}
}}
lab = data.frame(question = yrbss::yrbss_questions_binary$variable, label = yrbss::yrbss_questions_binary$label)%>%
filter(question %in% qns)
bullying = left_join(bullying,lab, by = c(variable = "question"))
bullying$state[which(bullying$state=="AZB")]="AZ"
write_csv(bullying, "bullying_survey.csv")
}
lab = data.frame(question = yrbss::yrbss_questions_binary$variable, label = yrbss::yrbss_questions_binary$label)%>%
filter(question %in% qns)
# read in csv
bullying_survey = read_csv("bullying_survey.csv")
## Parsed with column specification:
## cols(
## variable = col_character(),
## state = col_character(),
## year = col_double(),
## prop = col_double(),
## ciLB = col_double(),
## ciUB = col_double(),
## n = col_double(),
## label = col_character()
## )
bullying_to_merge = read_csv("bullying_survey.csv")%>%
select(-c(ciLB,ciUB,n,label))%>%
gather(key = stat_type, value = value, prop)%>%
spread(variable, value)%>%
select(-stat_type)
## Parsed with column specification:
## cols(
## variable = col_character(),
## state = col_character(),
## year = col_double(),
## prop = col_double(),
## ciLB = col_double(),
## ciUB = col_double(),
## n = col_double(),
## label = col_character()
## )
colnames(bullying_to_merge)[3:17]=as.character(lab$label[1:15])
bullying_to_merge = bullying_to_merge%>%
clean_names()
ss_counts = left_join(ss_counts, bullying_to_merge, by = c("state","year"))
# check missingness for 2009-2015
sum(!complete.cases(filter(bullying_survey, year>=2009)))/nrow(filter(bullying_survey, year>=2009))
## [1] 0.2949126
### We're going to need to impute values for missing years and states for some of these variables if these data are going to be useful in our analysis
suicide = data.frame(year = NULL, state = NULL, score = NULL, n = NULL)
suicide_qns = bullying_survey%>%
filter(variable %in% c("qn27","qn28","qn29","qn30"))
for(y in yrs){
for(st in sts){
dat = filter(suicide_qns,(year ==y & state == st))
suicide_score = weighted.mean(dat$prop,1/(dat$n), na.rm = TRUE)
samp = mean(dat$n,na.rm = TRUE)
newrow = data.frame(year = y, state = st, score = suicide_score, n = samp)
suicide = bind_rows(suicide,newrow)
}
}
fight = data.frame(year = NULL, state = NULL, score = NULL, n = NULL)
fight_qns = bullying_survey%>%
filter(variable %in% c("qn18","qn19","qn20"))
for(y in yrs){
for(st in sts){
dat = filter(fight_qns,(year ==y & state == st))
fight_score = weighted.mean(dat$prop,1/(dat$n), na.rm = TRUE)
samp = mean(dat$n,na.rm = TRUE)
newrow = data.frame(year = y, state = st, score = fight_score, n = samp)
fight = bind_rows(fight,newrow)
}
}
safety = data.frame(year = NULL, state = NULL, score = NULL, n = NULL)
safety_qns = bullying_survey%>%
filter(variable %in% c("qn13","qn14","qn15","qn16","qn17"))
for(y in yrs){
for(st in sts){
dat = filter(safety_qns,(year ==y & state == st))
safety_score = weighted.mean(dat$prop,1/(dat$n), na.rm = TRUE)
samp = mean(dat$n,na.rm = TRUE)
newrow = data.frame(year = y, state = st, score = safety_score, n = samp)
safety = bind_rows(safety,newrow)
}
}
bull = data.frame(year = NULL, state = NULL, score = NULL, n = NULL)
bully_qns = bullying_survey%>%
filter(variable %in% c("qn24","qn25","qnbullyweight","qnbullygay"))
for(y in yrs){
for(st in sts){
dat = filter(bully_qns,(year ==y & state == st))
bully_score = weighted.mean(dat$prop,1/(dat$n), na.rm = TRUE)
samp = mean(dat$n,na.rm = TRUE)
newrow = data.frame(year = y, state = st, score = bully_score, n = samp)
bull = bind_rows(bull,newrow)
}
}
table(ss$reliability_score_1_5)%>%
data.frame()%>%
ggplot(aes(x = Var1, y = Freq))+
geom_bar(aes(fill = Var1), stat = "identity")+
scale_fill_discrete(name = "Reliability Score")+
labs(x = "Reliability Score", y = "Count")
table(ss$school_type)%>%
data.frame()%>%
ggplot(aes(x = Var1, y = Freq))+
geom_bar(aes(fill = Var1), stat = "identity")+
scale_fill_discrete(name = "School Type")+
labs(x = "School Type", y = "Count")
ss%>%
filter(shooter_age<200)%>%
ggplot(aes(x = reorder(state, shooter_age, FUN = mean), y = shooter_age))+
geom_boxplot()+
theme(axis.text.x = element_text(angle = 45, hjust = 1, size = 6))+
labs(y = "Age of shooter", x = "State", title = "Age of Shooter by State")
ss%>%
ggplot(aes(x = reorder(state, killed_includes_shooter, FUN = mean), y = killed_includes_shooter))+
geom_boxplot()+
theme(axis.text.x = element_text(angle = 45, hjust = 1, size = 6))+
labs(y = "Numbeer Killed (including shooter)", x = "State", title = "Fatalities")
ss%>%
ggplot(aes(x = reorder(state, wounded, FUN = mean), y = total_injured_killed_victims))+
geom_boxplot()+
theme(axis.text.x = element_text(angle = 45, hjust = 1, size = 6))+
labs(y = "Wounded ", x = "State", title = "Number Wounded")
ss%>%
ggplot(aes(x = reorder(state, total_injured_killed_victims, FUN = mean), y = total_injured_killed_victims))+
geom_boxplot()+
theme(axis.text.x = element_text(angle = 45, hjust = 1, size = 6))+
labs(y = "Wounded or Killed", x = "State", title = "Combined Wounded & Fatalities")
table(ss$state)%>%
data.frame()%>%
ggplot(aes(x = Var1, y = Freq))+
geom_bar(aes(x = reorder(Var1,Freq, desc)), stat = "identity")+
labs(x = "State", y = "Count", "Number of incidents by State (overall)")+
theme(axis.text.x = element_text(angle = 45, hjust = 1, size = 6))
cc = scales::seq_gradient_pal("green", "red", "Lab")(seq(0,1,length.out=12))
# This will handle the ordering
d <- ss_counts %>%
filter(year %in% seq(2012,2018,by=1))%>%
ungroup() %>% # As a precaution / handle in a separate .grouped_df method
arrange(year, incident_count) %>% # arrange by facet variables and continuous values
mutate(.r = row_number()) # Add a row number variable
ggplot(d, aes(x = .r, y= incident_count, fill = grade)) + # Use .r instead of x
geom_point(data = d%>%filter(incident_count==0),
aes(x = .r, y= incident_count, color = grade),
size = 0.7) +
geom_bar(stat = "identity")+
facet_wrap(~ year, scales = "free_x") + # Should free scales (though could be left to user)
scale_x_continuous( # This handles replacement of .r for x
breaks = d$.r, # notice need to reuse data frame
labels = d$state
)+
scale_fill_manual(values = cc, name = "Grade")+
scale_color_manual(values = cc, guide = FALSE)+
theme_bw()+
theme(axis.text.x = element_text(angle = 45, hjust = 1, size = 3.5), legend.position = c(0.7,.15), legend.direction = "horizontal",legend.key.size = unit(1.5,"line"), legend.text = element_text(size = 8), legend.title = element_text(size = 10))
ss_counts%>%
filter(year %in% seq(2012,2018,by=1))%>%
filter(!is.na(grade))%>%
ggplot(aes(x = incident_count , fill = grade))+
geom_histogram(binwidth = 1)+
facet_wrap(.~year)+
scale_fill_manual(values = cc)+
scale_x_continuous(breaks = seq(0,35,by=5))+
theme(axis.text.x = element_text(angle = 45, hjust = 1, size = 3.5))+
theme_bw()+
labs(x= "Number of School Shootings")
ss_counts%>%
filter(year %in% seq(2012,2018,by=1))%>%
filter(!is.na(grade))%>%
ggplot(aes(x = grade, y = incident_count, color = grade))+
geom_boxplot()+
facet_wrap(.~year)+
theme_bw()+
scale_color_manual(values = cc)+
labs(y= "Number of School Shootings", x = "Grade")
ss_counts%>%
filter(year %in% seq(2012,2018,by=1))%>%
ggplot()+
geom_bar(aes(x = reorder(state,population), y = incident_count), stat = "identity")+
geom_point(aes(x = state, y = log(population), color = "population"), size = .4)+
theme_bw()+
labs(x = "State", y = "Number of School Shootings", title = "Number of School Shooting and Population by State")+
theme(axis.text.x = element_text(angle = 45, hjust = 1, size = 3.5), legend.position = c(.7, 0), legend.justification = c(1, 0))+
facet_wrap(.~year)+
scale_color_discrete(name = NULL, labels = "Population (log)")
ss_counts%>%
filter(year %in% seq(2012,2018,by=1))%>%
group_by(state) %>%
mutate(ordr = mean(avg_victims, na.rm = T))%>%
filter(ordr!="NaN")%>%
ungroup()%>%
arrange(desc(ordr))%>%
ggplot(aes(x = reorder(state,ordr), y = avg_victims, color = year))+
geom_point(size = 0.6, position = "jitter")+
labs(x = "State", y = "Average number of victims per incident", title ="Victims per School Shooting")+
scale_color_viridis(option = "D")+
theme_bw()+
theme(axis.text.x = element_text(angle = 45, hjust = 1, size = 6))
media%>%
ggplot(aes(x = prev_month_art, y = mass_shooting, color = as.numeric(year)))+geom_point(position = "jitter")+
scale_color_continuous(name = "Year")+
labs(y = "Mass Shooting Count", x = "Articles in Previous Month")+
theme_bw()
media%>%
ggplot(aes(y = articles_shootings_excluding_firearm_laws_and_regulations, x= as.numeric(year), color = as.numeric(year)))+
geom_point()+
geom_jitter()+
labs(y = "Monthly Articles",x = "Year")+
theme(legend.position = "none")+
theme_bw()
media%>%
ggplot(aes(y = articles_shootings_excluding_firearm_laws_and_regulations/yearly_shootings, x= as.numeric(year), color = as.numeric(year)))+
geom_point()+
geom_jitter()+
labs(y = "Monthly Articles/Yearly Mass Shootings",x = "Year")+
theme(legend.position = "none")+
theme_bw()
media%>%
ggplot(aes(y = yearly_articles, x= as.numeric(year)))+
geom_point()+
labs(y = "Yearly Articles",x = "Year")+
theme_bw()
school_counts = ss%>%
select(school,city,state)%>%
unite(col = school_city,c(school,city,state), sep = "_")%>%
table(dnn = c("school","count"))%>%
data.frame()%>%
separate(col = school_city, into = c("school","city","state"), sep = "_" )
delta = data.frame(state = NULL, year = NULL, delta_gpa = NULL, delta_ss = NULL, delta_pop = NULL)
for(x in unique(ss_counts$state)){
dat = filter(ss_counts, state == x)%>%
arrange(year)
chng_gpa = c(NA,diff(dat$gpa))
chng_ss = c(NA, diff(dat$incident_count))
chng_pop = c(NA, diff(dat$population))
st = rep(x, nrow(dat))
newrows =data.frame(state = st, year = dat$year, delta_gpa = chng_gpa, delta_ss = chng_ss, delta_pop= chng_pop)
delta = bind_rows(delta,newrows)
}
ss_counts%>%
group_by(year)%>%
summarise(yearly_incidents = sum(incident_count), prev_year_art = prev_year_art[1] )%>%
ggplot(aes(x = year, y = yearly_incidents, fill = prev_year_art))+geom_bar(stat = "identity")+
labs(y = "Yearly School Shootings", x = "Year")+
scale_fill_viridis(option = "D", name = "Articles in previous year")+
theme_bw()
ggplot(data= bullying_survey, aes(x = year, y = prop, color = variable))+
geom_point(position = "jitter")+
geom_smooth(se=F)+
scale_color_viridis_d()+
theme_bw()
ggplot(data= bullying_survey%>%filter(variable %in% qns[9:17]), aes(x = year, y = prop, color = variable))+
geom_point(position = "jitter")+
geom_smooth(se=F)+
scale_color_viridis_d()+
theme_bw()
lab = data.frame(question = yrbss::yrbss_questions_binary$variable, label = yrbss::yrbss_questions_binary$label)%>%
filter(question %in% qns)
kableExtra::kable(lab, byrow = F, caption = "Questions topics")
| question | label |
|---|---|
| qn13 | Carried a weapon |
| qn14 | Carried a gun |
| qn15 | Carried a weapon on school property |
| qn16 | Did not go to school because they felt unsafe at school or on their way to or from school |
| qn17 | Were threatened or injured with a weapon on school property |
| qn18 | Were in a physical fight |
| qn19 | Were injured in a physical fight |
| qn20 | Were in a physical fight on school property |
| qn24 | Were bullied on school property |
| qn25 | Were electronically bullied |
| qn26 | Felt sad or hopeless |
| qn27 | Seriously considered attempting suicide |
| qn28 | Made a plan about how they would attempt suicide |
| qn29 | Attempted suicide |
| qn30 | Attempted suicide that resulted in an injury, poisoning, or overdose that had to be treated by a doctor or nurse |
| qnbullyweight | Been teased b/c of weight past 12 mos |
| qnbullygay | Been teased b/c labeled GLB past 12 mos |
bullying_survey%>%
slice(which(complete.cases(bullying_survey)))%>%
select(label)%>%
table()%>%
data.frame()%>%
kableExtra::kable(caption = "Observations per Question")
| . | Freq |
|---|---|
| Attempted suicide | 298 |
| Attempted suicide that resulted in an injury, poisoning, or overdose that had to be treated by a doctor or nurse | 282 |
| Been teased b/c labeled GLB past 12 mos | 18 |
| Been teased b/c of weight past 12 mos | 18 |
| Carried a gun | 231 |
| Carried a weapon | 275 |
| Carried a weapon on school property | 292 |
| Did not go to school because they felt unsafe at school or on their way to or from school | 298 |
| Felt sad or hopeless | 251 |
| Made a plan about how they would attempt suicide | 291 |
| Seriously considered attempting suicide | 308 |
| Were bullied on school property | 124 |
| Were electronically bullied | 94 |
| Were in a physical fight | 298 |
| Were in a physical fight on school property | 294 |
| Were injured in a physical fight | 270 |
| Were threatened or injured with a weapon on school property | 291 |
fit = glm(incident_count~gpa+poverty+bullying_score+mh_score+ prev_year_art+year+offset(log(population)), family = "poisson", data = ss_counts)
if(FALSE){
ts_y = ts(ss_counts%>%slice(which(complete.cases(ss_counts)))%>%select(incident_count))
ts_x = ss_counts%>%
ungroup()%>%
slice(which(complete.cases(ss_counts)))%>%
select(c(gpa,poverty,bullying_score,yearly_articles,population))%>%
mutate(population = log(population))%>%
as.matrix()
ts_fit = tsglm(ts_y, xreg = ts_x, model = list(past_obs = 1, external = TRUE), link = "log", distr = "poisson")
best_fit = forecast::auto.arima(diff(ts_y), xreg = diff(ts_x))
astsa::sarima(diff(ts_y),p = 0, d = 0, q =0, xreg = diff(ts_x))
arima(diff(ts_y),order = c(0,0,0), xreg = diff(ts_x))$coef
sqrt(diag(vcov( arima(diff(ts_y),order = c(0,0,0), xreg = diff(ts_x)))))
cilb= arima(diff(ts_y),order = c(0,0,0), xreg = diff(ts_x))$coef-sqrt(diag(vcov( arima(diff(ts_y),order = c(0,0,0), xreg = diff(ts_x)))))
ciub = arima(diff(ts_y),order = c(0,0,0), xreg = diff(ts_x))$coef+sqrt(diag(vcov( arima(diff(ts_y),order = c(0,0,0), xreg = diff(ts_x)))))
data.frame(Lower= cilb, Upper = ciub)
}